Country level analysis¶

Load libraries¶

import warnings
from functools import partial

import covid_analysis.utils.paths as path
import janitor
import joblib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pandas_flavor as pf
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import seaborn as sns
from plotly.offline import init_notebook_mode

Set defaults for plots¶

# matplotlib
plt.style.use("seaborn-whitegrid")
plt.rcParams["figure.figsize"] = (10, 8)

# seaborn
sns.set_style("whitegrid")

# plotly
init_notebook_mode()
pio.templates.default = "plotly_white"
pd.options.plotting.backend = "plotly"

# Some plot warninigs
warnings.filterwarnings("ignore")

Utility functions¶

Define input directory¶

input_dir = path.data_processed_dir()

Load data¶

Confirmed and deaths time series¶

hopkins_tidy_cumulative_df = (
    pd.read_csv(
        filepath_or_buffer=input_dir.joinpath("hopkins_tidy_cumulative.csv")
    )
    .transform_column("date", pd.to_datetime)
)

hopkins_tidy_cumulative_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 118755 entries, 0 to 118754
Data columns (total 4 columns):
 #   Column     Non-Null Count   Dtype         
---  ------     --------------   -----         
 0   country    118755 non-null  object        
 1   date       118755 non-null  datetime64[ns]
 2   confirmed  118755 non-null  int64         
 3   deaths     118755 non-null  int64         
dtypes: datetime64[ns](1), int64(2), object(1)
memory usage: 3.6+ MB

Vaccination time series¶

vaccination_tidy_cumulative_df = (
    pd.read_csv(
        filepath_or_buffer=input_dir.joinpath("vaccination_country_cumulative.csv")
    )
)

vaccination_tidy_cumulative_df.head(1)
country date doses_admin people_partially_vaccinated people_fully_vaccinated
0 Afghanistan 2021-02-22 0 0.0 0.0

Countries population metadata¶

countries_population_df = pd.read_csv(
    filepath_or_buffer=input_dir.joinpath("countries_population.csv")
)

countries_population_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 196 entries, 0 to 195
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   country     196 non-null    object 
 1   population  196 non-null    float64
dtypes: float64(1), object(1)
memory usage: 3.2+ KB

Pandemic behavior to date¶

Spread progression¶

fig = (
    hopkins_tidy_cumulative_df
    .groupby("country")
    .resample("4D", on="date")
    .first()
    .reset_index(drop=True)
    .assign(
        date=lambda df: df.date.dt.strftime("%Y-%m-%d")
    )
    .pipe(
        lambda df: px.choropleth(
            df,
            locations="country",
            locationmode="country names",
            color="confirmed",
            animation_frame="date",
            color_continuous_scale='Plasma',
            hover_name="country",
            hover_data=dict(
                    country=False,
            ),
            labels=dict(
                country="Country",
                confirmed="Confirmed cases",
                date="Date"
            )
        )
    )
    .update_geos(
        fitbounds="locations",
        visible=False
    )
    .update_layout(
        margin={
            "r": 0,
            "t": 0,
            "l": 0,
            "b": 0
        }
    )
)

fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 1e-10
fig.show()

Death progression¶

fig = (
    hopkins_tidy_cumulative_df
    .groupby("country")
    .resample("4D", on="date")
    .first()
    .reset_index(drop=True)
    .assign(
        date=lambda df: df.date.dt.strftime("%Y-%m-%d")
    )
    .pipe(
        lambda df: px.choropleth(
            df,
            locations="country",
            locationmode="country names",
            color="deaths",
            animation_frame="date",
            color_continuous_scale='Plasma',
            hover_name="country",
            labels=dict(
                country="Country",
                deaths="Deaths",
                date="Date"
            )
        )
    )
    .update_geos(
        fitbounds="locations",
        visible=False
    )
    .update_layout(
        margin={
            "r": 0,
            "t": 0,
            "l": 0,
            "b": 0
        }
    )
)

fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 1e-10
fig.show()

Affected countries over time¶

(
    hopkins_tidy_cumulative_df
    .remove_columns("deaths")
    .filter_on("confirmed != 0")
    .groupby("country")
    .head(1)
    .groupby("date")
    .count()
    .reset_index()
    .remove_columns("confirmed")
    .assign(
        n_affected_countries=lambda df: df.country.cumsum(),
        percentage_affected_countries=lambda df: df.n_affected_countries / df.n_affected_countries.max() * 100
    )
    .pipe(
        lambda df: (
            px.area(
                df,
                x="date",
                y="percentage_affected_countries",
                labels=dict(
                    date="Date",
                    percentage_affected_countries="Percentage of affected countries"
                )
            )
            .update_layout(
                yaxis=dict(ticksuffix="%")
            )
        )
    )
)

Mortality Analysis¶

Create dataset¶

worldwide_mortality_df = (
    hopkins_tidy_cumulative_df
    .merge(
        countries_population_df,
        on="country"
    )
    # .filter_on("population != 0 and population > 1e3")
    # .filter_on("confirmed > 1e3")
    .groupby("country")
    .last()
    .reset_index()
    .assign(
        fatality_rate=lambda df: df.deaths / df.confirmed * 100,
        mortality_rate=lambda df: df.deaths / df.population * 100
    )
    .select_columns(["country", "confirmed", "deaths", "population", "*_rate"])
)

Identify most affected countries¶

top_10_death_countries = (
    worldwide_mortality_df
    .sort_values("deaths", ascending=False)
    .head(10)
    .country
)

Cumulative behavior of the countries¶

interest_countries_cumulative_df = (
    hopkins_tidy_cumulative_df
    .query("country in @top_10_death_countries")
)

(
    interest_countries_cumulative_df
    .pivot_wider(
        "country",
        "date",
        "deaths"
    )
    .set_index("country")
    .loc[top_10_death_countries]
    .pipe(
        lambda df: (
            px.imshow(
                df,
                labels=dict(
                    x="Date",
                    y="",
                    color="Number of deaths"
                )
            )
            .update_layout(
                margin={
                    "r": 0,
                    "t": 0,
                    "l": 0,
                    "b": 0
                }
            )  
        )
    ) 
)

Most affected countries by fatality rate¶

interest_countries_fatality_df = (
    worldwide_mortality_df
    .query("country in @top_10_death_countries")
)

(
    interest_countries_fatality_df
    .assign(
        fatality_rate=lambda df: df.fatality_rate.round(2)
    )
    .sort_values("fatality_rate")
    .pipe(
        lambda df: (
            px.bar(
                df,
                x="fatality_rate",
                y="country",
                text="fatality_rate",
                labels=dict(
                    fatality_rate="Fatality rate",
                    country=""
                ),
                hover_name="country",
                hover_data=dict(
                    country=False,
                    fatality_rate=False
                )
            )
            .update_traces(
                texttemplate="%{text}%"
            )
        )
    )
)
(
    worldwide_mortality_df
    .assign(
        fatality_rate=lambda df: df.fatality_rate.round(2),
        most_affected_by_death=lambda df: (
            df.country.isin(top_10_death_countries)
        )
    )
    .pipe(
        lambda df: (
            px.scatter(
                df,
                x="confirmed",
                y="deaths",
                color="most_affected_by_death",
                labels=dict(
                    confirmed="Confirmed cases",
                    deaths="Deaths",
                    fatality_rate="Fatality rate"
                ),
                hover_name="country",
                hover_data=dict(
                    fatality_rate=True,
                    confirmed=False,
                    deaths=False,
                    most_affected_by_death=False
                ),
                log_x=True,
                log_y=True,
            )
          .add_scatter(
                x=df.confirmed,
                y=df.confirmed * 0.005,
                showlegend=False,
                opacity=0.2,
                line=dict(color="gray"),
                hovertemplate="<extra>0.5%</extra>"
            )
            .add_scatter(
                x=df.confirmed,
                y=df.confirmed * 0.010,
                showlegend=False,
                opacity=0.2,
                line=dict(color="gray"),
                hovertemplate="<extra>1%</extra>"
            )
            .add_scatter(
                x=df.confirmed,
                y=df.confirmed * 0.020,
                showlegend=False,
                opacity=0.2,
                line=dict(color="gray"),
                hovertemplate="<extra>2%</extra>"
            )
            .add_scatter(
                x=df.confirmed,
                y=df.confirmed * 0.05,
                showlegend=False,
                opacity=0.2,
                line=dict(color="gray"),
                hovertemplate="<extra>5%</extra>"
            )
            .add_scatter(
                x=df.confirmed,
                y=df.confirmed * 0.1,
                showlegend=False,
                opacity=0.2,
                line=dict(color="gray"),
                hovertemplate="<extra>10%</extra>"
            )
            .update_layout(
                title="Fatality rate",
                showlegend=False
            )
        )
    )
)

Trajectories per country¶

Create dataset¶

trajectories_countries_df = (
    interest_countries_cumulative_df
    .pipe(
        lambda df: (
            df
            .merge(
                (
                    df
                    .groupby("country")
                    .apply(
                        lambda sub_df: (
                            sub_df
                            .set_index("date")
                            .diff()
                            .rolling("7D")
                            .mean()
                        )
                    )
                    .reset_index()
                ),
                on=["country", "date"],
                suffixes=("_total", "_rolling")
            )
        )
    )
    .assign(
        date=lambda df: df.date.dt.strftime("%Y-%m-%d")
    )
    .dropna()
)

trajectories_countries_df.head(1)
country date confirmed_total deaths_total confirmed_rolling deaths_rolling
1 Brazil 2020-01-23 0 0 0.0 0.0

Confirmed cases¶

(
    trajectories_countries_df
    .pipe(
        lambda df: (
            px.line(
                df,
                x="confirmed_total",
                y="confirmed_rolling",
                color="country",
                labels=dict(
                    confirmed_total="Total confirmed cases",
                    confirmed_rolling="Confirmed new cases",
                    country="Country",
                    date="Date"
                ),
                hover_name="country",
                hover_data=dict(
                    date=True,
                    confirmed_total=True,
                    confirmed_rolling=True,
                    country=False,
                )
            )
            .update_layout(
                title="Confirmed COVID-19 trajectories (7-day moving average)",
                legend=dict(
                    orientation="h",
                    yanchor="bottom",
                    y=-0.3

                ),
                margin={
                    "r": 0.7,
                    "l": 0,
                }
            )
        )
    )
)

Deaths¶

Linear scale¶

(
    trajectories_countries_df
    .pipe(
        lambda df: (
            px.line(
                df,
                x="deaths_total",
                y="deaths_rolling",
                color="country",
                labels=dict(
                    deaths_total="Total deaths",
                    deaths_rolling="New deaths",
                    country="Country",
                    date="Date"
                ),
                hover_name="country",
                hover_data=dict(
                    date=True,
                    deaths_total=True,
                    deaths_rolling=True,
                    country=False,
                )
            )
            .update_layout(
                title="Deaths COVID-19 trajectories (7-day moving average)",
                legend=dict(
                    orientation="h",
                    yanchor="bottom",
                    y=-0.3
                ),
                margin={
                    "r": 0.7,
                    "l": 0,
                }
            )
        )
    )
)

Logarithmic scale (We are all in this together!)¶

(
    trajectories_countries_df
    .pipe(
        lambda df: (
            px.line(
                df,
                x="deaths_total",
                y="deaths_rolling",
                color="country",
                labels=dict(
                    deaths_total="Total deaths",
                    deaths_rolling="New deaths",
                    country="Country",
                    date="Date"
                ),
                hover_name="country",
                hover_data=dict(
                    date=True,
                    deaths_total=True,
                    deaths_rolling=True,
                    country=False,
                ),
                log_x=True,
                log_y=True
            )
            .update_layout(
                title="Deaths COVID-19 trajectories (7-day moving average)",
                legend=dict(
                    orientation="h",
                    yanchor="bottom",
                    y=-0.3
                ),
                margin={
                    "r": 0.7,
                    "l": 0,
                }
            )
        )
    )
)

Vaccination per each country¶

(
    vaccination_tidy_cumulative_df
    .sort_values(["country", "date"])
    .groupby(["country"])
    .tail(1)
    .assign(
        percentage_accumulated_vaccines=lambda df: (df.doses_admin / df.doses_admin.sum() * 100).round(2)
    )
    .pipe(
        lambda df: (
            px.treemap(
                df,
                path=[px.Constant("World"), "country"],
                values="doses_admin",
                hover_name="country",
                labels=dict(
                    country="Country",
                    percentage_accumulated_vaccines="Percentage of accumulated vaccines",
                    doses_admin="Administered doses",
                    people_partially_vaccinated="People partially vaccinated",
                    people_fully_vaccinated="People fully vaccinated"
                ),
                hover_data=dict(
                    country=False,
                    percentage_accumulated_vaccines=True,
                    people_partially_vaccinated=True,
                    people_fully_vaccinated=True
                )
            )
        )
    )
)